Amount of code and type system

High profile repos contain more code and a higher percentage of it is statically typed.

ggplot(repo_data_all %>% 
         filter(total_bytes_no_data_type_system_static > 0 & total_bytes_no_data_type_system_static > 0), 
       aes(y = log10(total_bytes_no_data_type_system_static), 
           x = log10(total_file_size_no_data),
           col = is_high_profile)) +
  geom_point(size = 1) +
  geom_point(data = subset(repo_data_all, is_high_profile), size = 1) +
  ylab("Total bytes of statically typed code (log10)") +
  xlab("Total bytes of code (log10)") +
  geom_abline(slope = 1, intercept = 0) +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12)) +
  xlim(0, 8) +
  ylim(0, 8)
## Warning: Removed 1 rows containing missing values (geom_point).

Team size and outside committers

High profile repos have more developers and more outside developers.

plt_data_committers <- repo_data_all %>% 
  select(num_non_committing_authors, commit_authors, is_high_profile) %>%
  group_by(num_non_committing_authors, commit_authors, is_high_profile) %>%
  dplyr::summarize(Num_repos = n())
ggplot(plt_data_committers, 
       aes(y = num_non_committing_authors, 
           x = commit_authors,
           col = is_high_profile)) +
  geom_point(aes(size = Num_repos)) +
  geom_point(data = subset(plt_data_committers, is_high_profile), aes(size = Num_repos)) +
  ylab("Number of non-committing commit authors") +
  xlab("Number of commit authors") +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12)) +
  #xlim(0, max(log10(repo_data_all$commit_authors), na.rm = T)) +
  ylim(0, max(repo_data_all$num_non_committing_authors, na.rm = T)) +
  scale_x_log10() +
  stat_function(fun = function(x) {x}, geom="line", color = "black")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 39 rows containing missing values (geom_path).

Number of files and file size

High profile repos have more source files but similar file sizes

plt_data_num_files <- repo_data_all %>% filter(num_files_no_data > 0 & mean_lines_code_no_data > 0)
breaks <- c(1, 10, 100, 1000, 10000)
ggplot(plt_data_num_files, 
       aes(x = num_files_no_data, 
           y = mean_lines_code_no_data,
           col = is_high_profile)) +
  geom_point(size = 1) +
  geom_point(data = subset(plt_data_num_files, is_high_profile), size = 1) +
  xlab("Total source files") +
  ylab("Mean lines of code per source file") +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12)) +
  scale_x_log10(breaks = breaks) +
  scale_y_log10(breaks = breaks)

Topics

Some paper topics are associated with higher community engagement, repo size, and commit activity

as.tbl(repo_data_main) %>% 
  select(commits, commit_authors, forks_count, subscribers_count, 
         watchers_count, num_citations_per_week_pmc_minus_2_years, 
         total_file_size_no_data, num_files_no_data, contains("topic")) %>% 
  melt(id.vars = c("commits", "commit_authors", "forks_count", 
                   "subscribers_count", "watchers_count", "num_citations_per_week_pmc_minus_2_years",
                   "total_file_size_no_data", "num_files_no_data")) %>% 
  filter(value) %>% 
  as.tbl() %>% 
  select(-value) %>% 
  dplyr::rename(Topic = variable) %>% 
  group_by(Topic) %>%
  dplyr::summarize(
    `Mean commits` = mean(commits, na.rm = T),
    `Mean commit authors` = mean(commit_authors, na.rm = T),
    `Mean forks` = mean(forks_count, na.rm = T),
    `Mean subscribers` = mean(subscribers_count, na.rm = T),
    `Mean watchers` = mean(watchers_count, na.rm = T),
    `Mean PMC citations / week` = mean(num_citations_per_week_pmc_minus_2_years, na.rm = T),
    `Mean megabytes of code` = mean(total_file_size_no_data, na.rm = T) / 1000000,
    `Mean number of files` = mean(num_files_no_data, na.rm = T)
  ) %>%
  melt(id.vars = "Topic") %>%
  mutate(Topic = gsub("topic_", "", Topic)) %>%
  mutate(Topic = gsub("_", " ", Topic)) %>%
  mutate(Topic = gsub("RNA.seq", "RNA-seq", Topic)) %>%
  ggplot(aes(x = variable, y = value, fill = factor(Topic))) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_bw() +
  guides(fill = guide_legend(title="Abstract includes topic")) +
  theme(legend.text = element_text(size=10),
        legend.title = element_text(size = 11),
        axis.text.y = element_text(size = 10),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        strip.text = element_text(size = 10)) +
  scale_fill_brewer(palette="Dark2") +
  facet_wrap(~variable, scales = "free", ncol = 2)

Commits after publication

Committing to the repo after publication is associated with more community engagement, more development activity, and more citations.

plt_data_commits_after_pub <- as.tbl(repo_data_main) %>% 
  select(commits, commit_authors, forks_count, subscribers_count, 
         watchers_count, num_citations_per_week_pmc_minus_2_years, 
         mean_commit_message_len, pct_commits_diff_author_committer, 
         num_non_committing_authors, commits_after_article_in_pubmed) %>% 
  filter(!is.na(commits_after_article_in_pubmed)) %>%
  dplyr::rename(
    `Total commits` = commits,
    `Commit authors` = commit_authors, 
    `Total forks` = forks_count, 
    `Total subscribers` = subscribers_count, 
    `Total watchers` = watchers_count,
    `PMC citations / week` = num_citations_per_week_pmc_minus_2_years,
    `Commit message len` = mean_commit_message_len,
    `Pct outside commits` = pct_commits_diff_author_committer,
    `Outside cmt authors` = num_non_committing_authors,
    `Commits after\npublication` = commits_after_article_in_pubmed
  ) %>% 
  melt(id.vars = "Commits after\npublication")

# Get smallest positive value of each variable so we can take logs
min_pos <- plt_data_commits_after_pub %>% 
  filter(value > 0) %>%
  group_by(variable) %>%
  dplyr::summarize(min_pos = min(value))

# Remove top outliers for plot
p_outlier <- 1 # 1 means no filtering for outliers
outlier_cutoff <- plt_data_commits_after_pub %>%
  group_by(variable) %>%
  dplyr::summarize(outlier_cutoff = quantile(value, probs = p_outlier, na.rm = T))

plt_data_commits_after_pub <- plt_data_commits_after_pub %>% 
  left_join(min_pos, by = "variable") %>%
  left_join(outlier_cutoff, by = "variable")

# Replace 0's and NA's by minimum positive value
plt_data_commits_after_pub$value_pos <- apply(plt_data_commits_after_pub, 1, function(row) {
  val <- as.numeric(row["value"])
  mp <- as.numeric(row["min_pos"])
  if(is.na(val)) mp
  else max(val, mp)
})

plt_data_commits_after_pub <- plt_data_commits_after_pub %>%
  filter(value_pos <= outlier_cutoff) %>%
  select(`Commits after\npublication`, variable, value_pos)

ggplot(plt_data_commits_after_pub, aes(variable, value_pos)) +
  geom_boxplot(aes(fill = `Commits after\npublication`)) +
  facet_wrap(~variable, scales = "free", ncol = 3) +
  scale_y_log10() +
  theme_bw() +
  theme(legend.text = element_text(size=10),
        legend.title = element_text(size = 11),
        axis.text.y = element_text(size = 10),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        strip.text = element_text(size = 10))

Outside contributors

plt_data_outside_contrib <- as.tbl(repo_data_all) %>% 
  select(forks_count, subscribers_count, watchers_count, num_non_committing_authors, is_high_profile) %>% 
  mutate(forks_count = forks_count + 1,
         subscribers_count = subscribers_count + 1,
         watchers_count = watchers_count + 1,
         num_non_committing_authors = num_non_committing_authors + 1) %>%
  dplyr::rename(
    `Total forks + 1` = forks_count, 
    `Total subscribers + 1` = subscribers_count, 
    `Total watchers + 1` = watchers_count,
    `Outside commit authors + 1` = num_non_committing_authors
  ) %>%
  melt(id.vars = c("Outside commit authors + 1", "is_high_profile")) %>%
  group_by(`Outside commit authors + 1`, is_high_profile, variable, value) %>%
  dplyr::summarize(`Num repos` = n())

ggplot(plt_data_outside_contrib) +
  geom_point(aes(size = `Num repos`, 
                 x = `Outside commit authors + 1`, 
                 y = value, col = is_high_profile)) +
  geom_point(data = subset(plt_data_outside_contrib, is_high_profile),
             aes(size = `Num repos`, 
                 x = `Outside commit authors + 1`, 
                 y = value, col = is_high_profile)) +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        strip.text = element_text(size = 11)) +
  facet_wrap(~variable, scales = "free", ncol = 3) +
  scale_y_log10(breaks = c(1, 10, 100, 1000, 10000)) +
  scale_x_log10()

Commit authors and community

plt_data_commit_authors <- as.tbl(repo_data_all) %>% 
  select(forks_count, subscribers_count, watchers_count, commit_authors, is_high_profile) %>% 
  mutate(forks_count = forks_count + 1,
         subscribers_count = subscribers_count + 1,
         watchers_count = watchers_count + 1) %>%
  dplyr::rename(
    `Total forks + 1` = forks_count, 
    `Total subscribers + 1` = subscribers_count, 
    `Total watchers + 1` = watchers_count,
    `Commit authors` = commit_authors,
    `High profile` = is_high_profile
  ) %>%
  group_by(`Total forks + 1`, `Total subscribers + 1`, `Total watchers + 1`, `Commit authors`, `High profile`) %>%
  dplyr::summarize(`Num repos` = n()) %>%
  melt(id.vars = c("Commit authors", "High profile", "Num repos"))

ggplot(plt_data_commit_authors) +
  geom_point(aes(x = `Commit authors`, 
                 y = value,
                 col = `High profile`,
                 size = `Num repos`)) +
  geom_point(data = subset(plt_data_commit_authors, `High profile`),
             aes(x = `Commit authors`, 
                 y = value,
                 col = `High profile`,
                 size = `Num repos`)) +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        axis.title.y = element_blank(),
        strip.text = element_text(size = 11)) +
  facet_wrap(~variable, scales = "free", ncol = 3) +
  scale_y_log10(breaks = c(1, 10, 100, 1000)) +
  scale_x_log10()
## Warning: Removed 6 rows containing missing values (geom_point).

Commits

plt_data_commit_authors <- as.tbl(repo_data_all) %>% 
  select(commits, mean_commits_per_month, consecutive_months_with_commits, 
         commit_span_days, mean_files_added_per_month, num_days_new_files_added,
         consecutive_months_no_commits, is_high_profile) %>% 
  mutate(consecutive_months_no_commits = consecutive_months_no_commits + 1) %>%
  dplyr::rename(
    `Total commits` = commits,
    `Mean commits/month` = mean_commits_per_month,
    `Max cons. months with commits` = consecutive_months_with_commits,
    `Project duration (days)` = commit_span_days,
    `Mean new files per month` = mean_files_added_per_month,
    `Days with new files added` = num_days_new_files_added,
    `1 + max cons. months no commits` = consecutive_months_no_commits,
    `High profile` = is_high_profile
  ) %>%
  group_by(`Total commits`, `Mean commits/month`, `Max cons. months with commits`, 
           `Project duration (days)`, `Mean new files per month`, 
           `Days with new files added`, `1 + max cons. months no commits`, `High profile`) %>%
  dplyr::summarize(`Num repos` = n()) %>%
  melt(id.vars = c("Total commits", "High profile", "Num repos"))

ggplot(plt_data_commit_authors) +
  geom_point(aes(x = `Total commits`, 
                 y = value,
                 col = `High profile`,
                 size = `Num repos`)) +
  geom_point(data = subset(plt_data_commit_authors, `High profile`), 
             aes(x = `Total commits`, 
                 y = value,
                 col = `High profile`,
                 size = `Num repos`)) +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        axis.title.y = element_blank(),
        strip.text = element_text(size = 10)) +
  facet_wrap(~variable, scales = "free", ncol = 3) +
  scale_y_log10(breaks = c(1, 10, 100, 1000)) +
  scale_x_log10(breaks = c(1, 10, 100, 1000, 10000))
## Warning: Removed 20 rows containing missing values (geom_point).

Languages: file sizes and lines of code

top_langs_as_header <- sapply(top_langs, format_lang_as_header)
lang_cols <- unname(c(sapply(top_langs_as_header, function(x) paste("num_files_", x, sep = "")), 
                      sapply(top_langs_as_header, function(x) paste("mean_lines_code_", x, sep = ""))))
plt_data_langs <- data.frame(`Number of files` = integer(), 
                             `Mean lines of code per file` = numeric(), 
                             is_high_profile = logical(), 
                             lang = character())
for(lang in top_langs_as_header) {
  col_nf <- paste("num_files_", lang, sep = "")
  col_loc <- paste("mean_lines_code_", lang, sep = "")
  plt_data_langs <- rbind(plt_data_langs,
                          repo_data_all %>% 
                            select(!!as.name(col_nf), !!as.name(col_loc), is_high_profile) %>% 
                            mutate(lang = lang) %>% 
                            rename(`Number of files` = !!as.name(col_nf), 
                                   `Mean lines of code per file` = !!as.name(col_loc)) %>% 
                            filter(`Number of files` > 0 & `Mean lines of code per file` > 0))
}

ggplot(plt_data_langs) +
  geom_point(aes(x = `Number of files`, 
                 y = `Mean lines of code per file`,
                 col = is_high_profile)) +
  geom_point(data = subset(plt_data_langs, is_high_profile), 
             aes(x = `Number of files`, 
                 y = `Mean lines of code per file`,
                 col = is_high_profile)) +
  scale_color_manual(values=c(color_main, color_high_prof), labels = c("Main repos", "High profile repos")) +
  theme_bw() +
  guides(color = guide_legend(title = "Dataset")) +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 10),
        strip.text = element_text(size = 10)) +
  facet_wrap(~lang, scales = "free", ncol = 5) +
  scale_y_log10(breaks = c(1, 10, 100, 1000, 10000)) +
  scale_x_log10(breaks = c(1, 10, 100, 1000, 10000))

Bytes of code by language

lang_cols <- unname(c(sapply(top_langs_as_header, function(x) paste("bytes_", x, sep = ""))))
plt_data_lang_bytes <- data.frame(bytes = integer(), 
                                  is_high_profile = logical(), 
                                  lang = character())
for(lang in top_langs_as_header) {
  col_b <- paste("bytes_", lang, sep = "")
  plt_data_lang_bytes <- rbind(plt_data_lang_bytes,
                               repo_data_all %>% 
                                 select(!!as.name(col_b), is_high_profile) %>% 
                                 mutate(lang = lang) %>% 
                                 rename(bytes = !!as.name(col_b)))
}

plt_data_lang_bytes$Dataset <- sapply(plt_data_lang_bytes$is_high_profile,
                                      function(x) {
                                        if(x) "High profile repos"
                                        else "Main repos"
                                      })

ggplot(plt_data_lang_bytes, 
       aes(x = lang, y = bytes / 1000000, fill = Dataset)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values=c(color_high_prof, color_main)) +
  theme_bw() +
  theme(axis.text = element_text(size = 12),
        strip.text = element_text(size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  xlab("Language") +
  ylab("Total megabytes of code") +
  facet_wrap(~Dataset, scales = "free")

Years by topic

plt_data_years_by_topic <- repo_data_main %>%
  select(first_commit, date_pubmed, contains("topic")) %>%
  mutate(year_first_commit = year(first_commit),
         year_pubmed = year(date_pubmed)) %>%
  select(-first_commit, -date_pubmed) %>%
  melt(id.vars = c("year_first_commit", "year_pubmed")) %>%
  filter(value) %>%
  group_by(year_first_commit, year_pubmed, variable) %>%
  dplyr::summarize(num_repos = n()) %>%
  filter(!is.na(year_first_commit) & !is.na(year_pubmed)) %>%
  rename(Topic = variable)

plt_data_years_by_topic$Topic <- sapply(plt_data_years_by_topic$Topic, function(x) {
  gsub("_", " ", gsub("topic_", "", gsub("RNA.seq", "RNA-seq", x)))
})

min_year <- min(c(plt_data_years_by_topic$year_first_commit, plt_data_years_by_topic$year_pubmed))
max_year <- max(c(plt_data_years_by_topic$year_first_commit, plt_data_years_by_topic$year_pubmed))
ggplot(plt_data_years_by_topic) +
  geom_point(aes(x = year_first_commit, 
                 y = year_pubmed,
                 size = num_repos)) +
  theme_bw() +
  theme(legend.text = element_text(size=12),
        legend.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        strip.text = element_text(size = 10)) +
  facet_wrap(~Topic, scales = "fixed", ncol = 2) +
  xlim(min_year, max_year) +
  ylim(min_year, max_year)

Licenses

plt_data_licenses <- repo_data_all %>%
  select(license, is_high_profile)
plt_data_licenses$is_high_profile <- as.factor(plt_data_licenses$is_high_profile)
levels(plt_data_licenses$is_high_profile) <- c("Main repos", "High profile repos")
ggplot(plt_data_licenses, aes(x = license)) + geom_histogram(stat = "count") +
  facet_wrap(~is_high_profile, scales = "free") +
  theme_bw() +
  theme(axis.text.y = element_text(size = 10),
        axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(size = 10)) +
  ylab("Number of repos") +
  xlab("License")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Language features

exec_method <- list_tabledata(project = proj_main, 
                              dataset = ds_lang, 
                              table = table_exec_method) %>%
  mutate(language = tolower(language))

type_system <- list_tabledata(project = proj_main, 
                              dataset = ds_lang, 
                              table = table_type_system) %>%
  mutate(language = tolower(language))

lang_features <- data.frame(language = tolower(top_langs),
                            lang_header = top_langs_as_header) %>%
  left_join(exec_method, by = "language") %>%
  left_join(type_system, by = "language")
## Warning: Column `language` joining factor and character vector, coercing
## into character vector
sum_lang <- function(prefix, keep_rows) {
  sapply(lang_features$lang_header, 
         function(x) {
           sum(repo_data_all[which(keep_rows), 
                             paste(prefix, x, sep="")])
         })}

lang_features$bytes_high_profile <- sum_lang("bytes_", repo_data_all$is_high_profile)
lang_features$bytes_main <- sum_lang("bytes_", !repo_data_all$is_high_profile)
lang_features$files_high_profile <- sum_lang("num_files_", repo_data_all$is_high_profile)
lang_features$files_main <- sum_lang("num_files_", !repo_data_all$is_high_profile)

lang_features$exec <- ""
lang_features$type <- ""
for(i in 1:nrow(lang_features)) {
  
  # Execution method
  interpreted <- isTRUE(lang_features[i, "interpreted"])
  compiled <- isTRUE(lang_features[i, "compiled"])
  if(interpreted && compiled) lang_features[i, "exec"] <- "Both"
  else if(interpreted) lang_features[i, "exec"] <- "Interpreted"
  else if(compiled) lang_features[i, "exec"] <- "Compiled"
  else lang_features[i, "exec"] <- NA
  
  # Type system
  type <- NA
  strength <- lang_features[i, "strength"]
  system <- lang_features[i, "system"]
  safety <- lang_features[i, "safety"]
  if(!is.na(strength)) type <- capitalize(strength)
  if(!is.na(system)) type <- paste(type, system)
  if(!is.na(safety)) type <- paste(type, safety)
  lang_features[i, "type"] <- type
}

plt_data_lang_features <- lang_features %>% select(exec, type, bytes_high_profile,
                                                   bytes_main, files_high_profile, files_main) %>%
  filter(!is.na(exec) & !is.na(type)) %>% 
  melt(id.vars = c("exec", "type")) %>% 
  mutate(is_high_profile = grepl("high_profile", variable)) %>% 
  mutate(variable = gsub("_main", "", gsub("_high_profile", "", variable)))
plt_data_lang_features$var <- ""
for(i in 1:nrow(plt_data_lang_features)) {
  variable <- plt_data_lang_features[i, "variable"]
  high_prof <- plt_data_lang_features[i, "is_high_profile"]
  var <- capitalize(variable)
  if(high_prof) var <- paste(var, "- high profile repos")
  else var <- paste(var, "- main repos")
  plt_data_lang_features[i, "var"] <- var
}
plt_data_lang_features <- plt_data_lang_features %>% 
  select(exec, type, var, value) %>%
  group_by(var) %>% 
  mutate(sum_var = sum(value)) %>% 
  ungroup() %>% 
  mutate(val_normalized = value / sum_var)

ggplot(plt_data_lang_features) +
  geom_point(aes(x = exec, 
                 y = type,
                 size = val_normalized)) +
  theme_bw() +
  theme(axis.text = element_text(size = 11),
        strip.text = element_text(size = 11),
        axis.title = element_text(size = 12),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  facet_wrap(~var, scales = "free", ncol = 2) +
  xlab("Execution mode") +
  ylab("Type system")

Session info

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.2
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2    jsonlite_1.5    Hmisc_4.1-1     Formula_1.2-2  
##  [5] survival_2.41-3 lattice_0.20-35 bigrquery_0.4.1 lubridate_1.7.1
##  [9] ggplot2_2.2.1   dplyr_0.7.4     reshape2_1.4.3 
## 
## loaded via a namespace (and not attached):
##  [1] splines_3.4.3       colorspace_1.3-2    htmltools_0.3.6    
##  [4] yaml_2.1.16         base64enc_0.1-3     rlang_0.1.6        
##  [7] pillar_1.0.1        foreign_0.8-69      glue_1.2.0         
## [10] DBI_0.7             RColorBrewer_1.1-2  bindr_0.1          
## [13] plyr_1.8.4          stringr_1.2.0       munsell_0.4.3      
## [16] gtable_0.2.0        htmlwidgets_0.9     evaluate_0.10.1    
## [19] labeling_0.3        latticeExtra_0.6-28 knitr_1.18         
## [22] curl_3.1            htmlTable_1.11.2    Rcpp_0.12.14       
## [25] acepack_1.4.1       openssl_0.9.9       scales_0.5.0       
## [28] backports_1.1.2     checkmate_1.8.5     gridExtra_2.3      
## [31] digest_0.6.13       stringi_1.1.6       grid_3.4.3         
## [34] rprojroot_1.3-2     tools_3.4.3         magrittr_1.5       
## [37] lazyeval_0.2.1      tibble_1.4.1        cluster_2.0.6      
## [40] pkgconfig_2.0.1     Matrix_1.2-12       data.table_1.10.4-3
## [43] assertthat_0.2.0    rmarkdown_1.8       httr_1.3.1         
## [46] rstudioapi_0.7      R6_2.2.2            rpart_4.1-11       
## [49] nnet_7.3-12         compiler_3.4.3